package cn.micai.util;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.PDFTextStripperByArea;

import java.io.File;
import java.io.IOException;

/**
 * 描述：PDFBox – How to read PDF file in Java
 * <p>
 *
 * @author: 赵新国
 * @date: 2018/6/25 17:09
 */
public class ReadPdfUtil {

    public static void main(String [] args) {

        try (PDDocument pdDocument = PDDocument.load(new File("D://英语常用短语1.pdf"))) {

            pdDocument.getClass();

            if (!pdDocument.isEncrypted()) {

                PDFTextStripperByArea pdfTextStripperByArea = new PDFTextStripperByArea();
                pdfTextStripperByArea.setSortByPosition(true);

                PDFTextStripper textStripper = new PDFTextStripper();

                String pdfFileInText = textStripper.getText(pdDocument);
                System.out.println("Text: " + pdfFileInText);

                // split by whitespace
                /*String lines[] = pdfFileInText.split("\\r?\\n");
                for (String line : lines) {
                    System.out.println(line);
                }*/

            }

        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
